10 years ago · caa2132b99
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,5 +1,6 @@
 
                 # Changes
              
 
                 
              
 
                +* Jun 19, 2015   - Add `url_from_event` to WebsiteAgent.
              
 
                 * Jun 17, 2015   - RssAgent emits events for new feed items in chronological order.
              
 
                 * Jun 15, 2015   - Liquid filter `uri_expand` added.
              
 
                 * Jun 12, 2015   - RSSAgent can now accept an array of URLs.
              
--- a/app/models/agents/website_agent.rb
+++ b/app/models/agents/website_agent.rb
@@ -19,7 +19,7 @@ module Agents
 
                 
              
 
                       `url` can be a single url, or an array of urls (for example, for multiple pages with the exact same structure but different content to scrape)
              
 
                 
              
 
                -      The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values.
              
 
                +      The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload, or if you set `url_from_event` it is used as a Liquid template to generate the url to access. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values.
              
 
                 
              
 
                       # Supported Document Types
              
 
                 
              
@@ -135,7 +135,8 @@ module Agents
 
                 
              
 
                     def validate_options
              
 
                       # Check for required fields
              
 
                -      errors.add(:base, "url and expected_update_period_in_days are required") unless options['expected_update_period_in_days'].present? && options['url'].present?
              
 
                +      errors.add(:base, "either url or url_from_event is required") unless options['url'].present? || options['url_from_event'].present?
              
 
                +      errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present?
              
 
                       if !options['extract'].present? && extraction_type != "json"
              
 
                         errors.add(:base, "extract is required for all types except json")
              
 
                       end
              
@@ -257,7 +258,12 @@ module Agents
 
                     def receive(incoming_events)
              
 
                       incoming_events.each do |event|
              
 
                         interpolate_with(event) do
              
 
                -          url_to_scrape = event.payload['url']
              
 
                +          url_to_scrape =
              
 
                +            if url_template = options['url_from_event'].presence
              
 
                +              interpolate_string(url_template)
              
 
                +            else
              
 
                +              event.payload['url']
              
 
                +            end
              
 
                           check_url(url_to_scrape,
              
 
                                     interpolated['mode'].to_s == "merge" ? event.payload : {})
              
 
                         end
              
--- a/spec/models/agents/website_agent_spec.rb
+++ b/spec/models/agents/website_agent_spec.rb
@@ -633,6 +633,17 @@ fire: hot
 
                         }.to change { Event.count }.by(1)
              
 
                       end
              
 
                 
              
 
                +      it "should use url_from_event as url to scrape if it exists when receiving an event" do
              
 
                +        stub = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Fxkcd.com')
              
 
                +
              
 
                +        @checker.options = @valid_options.merge(
              
 
                +          'url_from_event' => 'http://example.org/?url={{url | uri_escape}}'
              
 
                +        )
              
 
                +        @checker.receive([@event])
              
 
                +
              
 
                +        expect(stub).to have_been_requested
              
 
                +      end
              
 
                +
              
 
                       it "should interpolate values from incoming event payload" do
              
 
                         expect {
              
 
                           @valid_options['extract'] = {